{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "AutoEncoders.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ayush-09/AutoEncoder-Deep-Learning/blob/main/AutoEncoders.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "K4f4JG1gdKqj"
      },
      "source": [
        "#AutoEncoders"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rjOPzue7FCXJ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c488b69b-9cc1-4cd7-b954-0928c41b410d"
      },
      "source": [
        "!wget \"http://files.grouplens.org/datasets/movielens/ml-100k.zip\"\n",
        "!unzip ml-100k.zip\n",
        "!ls"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2021-01-09 14:08:13--  http://files.grouplens.org/datasets/movielens/ml-100k.zip\n",
            "Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152\n",
            "Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 4924029 (4.7M) [application/zip]\n",
            "Saving to: ‘ml-100k.zip’\n",
            "\n",
            "ml-100k.zip         100%[===================>]   4.70M  18.8MB/s    in 0.2s    \n",
            "\n",
            "2021-01-09 14:08:13 (18.8 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]\n",
            "\n",
            "Archive:  ml-100k.zip\n",
            "   creating: ml-100k/\n",
            "  inflating: ml-100k/allbut.pl       \n",
            "  inflating: ml-100k/mku.sh          \n",
            "  inflating: ml-100k/README          \n",
            "  inflating: ml-100k/u.data          \n",
            "  inflating: ml-100k/u.genre         \n",
            "  inflating: ml-100k/u.info          \n",
            "  inflating: ml-100k/u.item          \n",
            "  inflating: ml-100k/u.occupation    \n",
            "  inflating: ml-100k/u.user          \n",
            "  inflating: ml-100k/u1.base         \n",
            "  inflating: ml-100k/u1.test         \n",
            "  inflating: ml-100k/u2.base         \n",
            "  inflating: ml-100k/u2.test         \n",
            "  inflating: ml-100k/u3.base         \n",
            "  inflating: ml-100k/u3.test         \n",
            "  inflating: ml-100k/u4.base         \n",
            "  inflating: ml-100k/u4.test         \n",
            "  inflating: ml-100k/u5.base         \n",
            "  inflating: ml-100k/u5.test         \n",
            "  inflating: ml-100k/ua.base         \n",
            "  inflating: ml-100k/ua.test         \n",
            "  inflating: ml-100k/ub.base         \n",
            "  inflating: ml-100k/ub.test         \n",
            "ml-100k  ml-100k.zip  sample_data\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LOly1yfAfTjd",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "97173fc2-9434-4529-9841-2bb989507c05"
      },
      "source": [
        "!wget \"http://files.grouplens.org/datasets/movielens/ml-1m.zip\"\n",
        "!unzip ml-1m.zip\n",
        "!ls"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2021-01-09 14:08:24--  http://files.grouplens.org/datasets/movielens/ml-1m.zip\n",
            "Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152\n",
            "Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 5917549 (5.6M) [application/zip]\n",
            "Saving to: ‘ml-1m.zip’\n",
            "\n",
            "ml-1m.zip           100%[===================>]   5.64M  22.1MB/s    in 0.3s    \n",
            "\n",
            "2021-01-09 14:08:25 (22.1 MB/s) - ‘ml-1m.zip’ saved [5917549/5917549]\n",
            "\n",
            "Archive:  ml-1m.zip\n",
            "   creating: ml-1m/\n",
            "  inflating: ml-1m/movies.dat        \n",
            "  inflating: ml-1m/ratings.dat       \n",
            "  inflating: ml-1m/README            \n",
            "  inflating: ml-1m/users.dat         \n",
            "ml-100k  ml-100k.zip  ml-1m  ml-1m.zip\tsample_data\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_LvGeU1CeCtg"
      },
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import torch\n",
        "import torch.nn as nn\n",
        "import torch.nn.parallel\n",
        "import torch.optim as optim\n",
        "import torch.utils.data\n",
        "from torch.autograd import Variable"
      ],
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UJw2p3-Cewo4"
      },
      "source": [
        "# We won't be using this dataset.\n",
        "movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')\n",
        "users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')\n",
        "ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')"
      ],
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2usLKJBEgPE2"
      },
      "source": [
        "training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\\t')\n",
        "training_set = np.array(training_set, dtype = 'int')\n",
        "test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\\t')\n",
        "test_set = np.array(test_set, dtype = 'int')"
      ],
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gPaGZqdniC5m"
      },
      "source": [
        "nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))\n",
        "nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-wASs2YFiDaa"
      },
      "source": [
        "def convert(data):\n",
        "  new_data = []\n",
        "  for id_users in range(1, nb_users + 1):\n",
        "    id_movies = data[:, 1] [data[:, 0] == id_users]\n",
        "    id_ratings = data[:, 2] [data[:, 0] == id_users]\n",
        "    ratings = np.zeros(nb_movies)\n",
        "    ratings[id_movies - 1] = id_ratings\n",
        "    new_data.append(list(ratings))\n",
        "  return new_data\n",
        "training_set = convert(training_set)\n",
        "test_set = convert(test_set)"
      ],
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TwD-KD8yiEEw"
      },
      "source": [
        "training_set = torch.FloatTensor(training_set)\n",
        "test_set = torch.FloatTensor(test_set)"
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oU2nyh76iE6M"
      },
      "source": [
        "class SAE(nn.Module):\n",
        "    def __init__(self, ):\n",
        "        super(SAE, self).__init__()\n",
        "        self.fc1 = nn.Linear(nb_movies, 20)\n",
        "        self.fc2 = nn.Linear(20, 10)\n",
        "        self.fc3 = nn.Linear(10, 20)\n",
        "        self.fc4 = nn.Linear(20, nb_movies)\n",
        "        self.activation = nn.Sigmoid()\n",
        "    def forward(self, x):\n",
        "        x = self.activation(self.fc1(x))\n",
        "        x = self.activation(self.fc2(x))\n",
        "        x = self.activation(self.fc3(x))\n",
        "        x = self.fc4(x)\n",
        "        return x\n",
        "sae = SAE()\n",
        "criterion = nn.MSELoss()\n",
        "optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)"
      ],
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FEz9hRaciFTs",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8ffae3d0-f74c-47af-c046-fbe8a5fec0b6"
      },
      "source": [
        "nb_epoch = 200\n",
        "for epoch in range(1, nb_epoch + 1):\n",
        "  train_loss = 0\n",
        "  s = 0.\n",
        "  for id_user in range(nb_users):\n",
        "    input = Variable(training_set[id_user]).unsqueeze(0)\n",
        "    target = input.clone()\n",
        "    if torch.sum(target.data > 0) > 0:\n",
        "      output = sae(input)\n",
        "      target.require_grad = False\n",
        "      output[target == 0] = 0\n",
        "      loss = criterion(output, target)\n",
        "      mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)\n",
        "      loss.backward()\n",
        "      train_loss += np.sqrt(loss.data*mean_corrector)\n",
        "      s += 1.\n",
        "      optimizer.step()\n",
        "  print('epoch: '+str(epoch)+'loss: '+ str(train_loss/s))"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "epoch: 1loss: tensor(1.7710)\n",
            "epoch: 2loss: tensor(1.0968)\n",
            "epoch: 3loss: tensor(1.0533)\n",
            "epoch: 4loss: tensor(1.0386)\n",
            "epoch: 5loss: tensor(1.0307)\n",
            "epoch: 6loss: tensor(1.0267)\n",
            "epoch: 7loss: tensor(1.0237)\n",
            "epoch: 8loss: tensor(1.0220)\n",
            "epoch: 9loss: tensor(1.0206)\n",
            "epoch: 10loss: tensor(1.0196)\n",
            "epoch: 11loss: tensor(1.0188)\n",
            "epoch: 12loss: tensor(1.0185)\n",
            "epoch: 13loss: tensor(1.0178)\n",
            "epoch: 14loss: tensor(1.0176)\n",
            "epoch: 15loss: tensor(1.0174)\n",
            "epoch: 16loss: tensor(1.0170)\n",
            "epoch: 17loss: tensor(1.0166)\n",
            "epoch: 18loss: tensor(1.0166)\n",
            "epoch: 19loss: tensor(1.0163)\n",
            "epoch: 20loss: tensor(1.0160)\n",
            "epoch: 21loss: tensor(1.0161)\n",
            "epoch: 22loss: tensor(1.0160)\n",
            "epoch: 23loss: tensor(1.0157)\n",
            "epoch: 24loss: tensor(1.0156)\n",
            "epoch: 25loss: tensor(1.0157)\n",
            "epoch: 26loss: tensor(1.0155)\n",
            "epoch: 27loss: tensor(1.0155)\n",
            "epoch: 28loss: tensor(1.0150)\n",
            "epoch: 29loss: tensor(1.0134)\n",
            "epoch: 30loss: tensor(1.0120)\n",
            "epoch: 31loss: tensor(1.0096)\n",
            "epoch: 32loss: tensor(1.0085)\n",
            "epoch: 33loss: tensor(1.0048)\n",
            "epoch: 34loss: tensor(1.0045)\n",
            "epoch: 35loss: tensor(1.0007)\n",
            "epoch: 36loss: tensor(0.9990)\n",
            "epoch: 37loss: tensor(0.9970)\n",
            "epoch: 38loss: tensor(0.9964)\n",
            "epoch: 39loss: tensor(0.9932)\n",
            "epoch: 40loss: tensor(0.9912)\n",
            "epoch: 41loss: tensor(0.9876)\n",
            "epoch: 42loss: tensor(0.9896)\n",
            "epoch: 43loss: tensor(0.9856)\n",
            "epoch: 44loss: tensor(0.9904)\n",
            "epoch: 45loss: tensor(0.9861)\n",
            "epoch: 46loss: tensor(0.9854)\n",
            "epoch: 47loss: tensor(0.9880)\n",
            "epoch: 48loss: tensor(0.9873)\n",
            "epoch: 49loss: tensor(0.9877)\n",
            "epoch: 50loss: tensor(0.9880)\n",
            "epoch: 51loss: tensor(0.9817)\n",
            "epoch: 52loss: tensor(0.9830)\n",
            "epoch: 53loss: tensor(0.9797)\n",
            "epoch: 54loss: tensor(0.9768)\n",
            "epoch: 55loss: tensor(0.9728)\n",
            "epoch: 56loss: tensor(0.9808)\n",
            "epoch: 57loss: tensor(0.9754)\n",
            "epoch: 58loss: tensor(0.9756)\n",
            "epoch: 59loss: tensor(0.9712)\n",
            "epoch: 60loss: tensor(0.9708)\n",
            "epoch: 61loss: tensor(0.9720)\n",
            "epoch: 62loss: tensor(0.9683)\n",
            "epoch: 63loss: tensor(0.9652)\n",
            "epoch: 64loss: tensor(0.9625)\n",
            "epoch: 65loss: tensor(0.9638)\n",
            "epoch: 66loss: tensor(0.9625)\n",
            "epoch: 67loss: tensor(0.9599)\n",
            "epoch: 68loss: tensor(0.9605)\n",
            "epoch: 69loss: tensor(0.9611)\n",
            "epoch: 70loss: tensor(0.9588)\n",
            "epoch: 71loss: tensor(0.9564)\n",
            "epoch: 72loss: tensor(0.9566)\n",
            "epoch: 73loss: tensor(0.9554)\n",
            "epoch: 74loss: tensor(0.9574)\n",
            "epoch: 75loss: tensor(0.9542)\n",
            "epoch: 76loss: tensor(0.9544)\n",
            "epoch: 77loss: tensor(0.9526)\n",
            "epoch: 78loss: tensor(0.9506)\n",
            "epoch: 79loss: tensor(0.9498)\n",
            "epoch: 80loss: tensor(0.9484)\n",
            "epoch: 81loss: tensor(0.9472)\n",
            "epoch: 82loss: tensor(0.9473)\n",
            "epoch: 83loss: tensor(0.9461)\n",
            "epoch: 84loss: tensor(0.9461)\n",
            "epoch: 85loss: tensor(0.9450)\n",
            "epoch: 86loss: tensor(0.9442)\n",
            "epoch: 87loss: tensor(0.9432)\n",
            "epoch: 88loss: tensor(0.9428)\n",
            "epoch: 89loss: tensor(0.9423)\n",
            "epoch: 90loss: tensor(0.9425)\n",
            "epoch: 91loss: tensor(0.9407)\n",
            "epoch: 92loss: tensor(0.9414)\n",
            "epoch: 93loss: tensor(0.9408)\n",
            "epoch: 94loss: tensor(0.9402)\n",
            "epoch: 95loss: tensor(0.9396)\n",
            "epoch: 96loss: tensor(0.9393)\n",
            "epoch: 97loss: tensor(0.9386)\n",
            "epoch: 98loss: tensor(0.9384)\n",
            "epoch: 99loss: tensor(0.9381)\n",
            "epoch: 100loss: tensor(0.9390)\n",
            "epoch: 101loss: tensor(0.9381)\n",
            "epoch: 102loss: tensor(0.9378)\n",
            "epoch: 103loss: tensor(0.9370)\n",
            "epoch: 104loss: tensor(0.9372)\n",
            "epoch: 105loss: tensor(0.9360)\n",
            "epoch: 106loss: tensor(0.9363)\n",
            "epoch: 107loss: tensor(0.9354)\n",
            "epoch: 108loss: tensor(0.9353)\n",
            "epoch: 109loss: tensor(0.9346)\n",
            "epoch: 110loss: tensor(0.9355)\n",
            "epoch: 111loss: tensor(0.9347)\n",
            "epoch: 112loss: tensor(0.9349)\n",
            "epoch: 113loss: tensor(0.9337)\n",
            "epoch: 114loss: tensor(0.9335)\n",
            "epoch: 115loss: tensor(0.9332)\n",
            "epoch: 116loss: tensor(0.9334)\n",
            "epoch: 117loss: tensor(0.9331)\n",
            "epoch: 118loss: tensor(0.9333)\n",
            "epoch: 119loss: tensor(0.9327)\n",
            "epoch: 120loss: tensor(0.9328)\n",
            "epoch: 121loss: tensor(0.9322)\n",
            "epoch: 122loss: tensor(0.9319)\n",
            "epoch: 123loss: tensor(0.9317)\n",
            "epoch: 124loss: tensor(0.9317)\n",
            "epoch: 125loss: tensor(0.9318)\n",
            "epoch: 126loss: tensor(0.9311)\n",
            "epoch: 127loss: tensor(0.9313)\n",
            "epoch: 128loss: tensor(0.9310)\n",
            "epoch: 129loss: tensor(0.9313)\n",
            "epoch: 130loss: tensor(0.9312)\n",
            "epoch: 131loss: tensor(0.9307)\n",
            "epoch: 132loss: tensor(0.9305)\n",
            "epoch: 133loss: tensor(0.9298)\n",
            "epoch: 134loss: tensor(0.9300)\n",
            "epoch: 135loss: tensor(0.9299)\n",
            "epoch: 136loss: tensor(0.9299)\n",
            "epoch: 137loss: tensor(0.9293)\n",
            "epoch: 138loss: tensor(0.9294)\n",
            "epoch: 139loss: tensor(0.9287)\n",
            "epoch: 140loss: tensor(0.9285)\n",
            "epoch: 141loss: tensor(0.9281)\n",
            "epoch: 142loss: tensor(0.9284)\n",
            "epoch: 143loss: tensor(0.9279)\n",
            "epoch: 144loss: tensor(0.9278)\n",
            "epoch: 145loss: tensor(0.9274)\n",
            "epoch: 146loss: tensor(0.9278)\n",
            "epoch: 147loss: tensor(0.9275)\n",
            "epoch: 148loss: tensor(0.9273)\n",
            "epoch: 149loss: tensor(0.9270)\n",
            "epoch: 150loss: tensor(0.9269)\n",
            "epoch: 151loss: tensor(0.9260)\n",
            "epoch: 152loss: tensor(0.9264)\n",
            "epoch: 153loss: tensor(0.9259)\n",
            "epoch: 154loss: tensor(0.9257)\n",
            "epoch: 155loss: tensor(0.9252)\n",
            "epoch: 156loss: tensor(0.9256)\n",
            "epoch: 157loss: tensor(0.9249)\n",
            "epoch: 158loss: tensor(0.9250)\n",
            "epoch: 159loss: tensor(0.9245)\n",
            "epoch: 160loss: tensor(0.9249)\n",
            "epoch: 161loss: tensor(0.9238)\n",
            "epoch: 162loss: tensor(0.9249)\n",
            "epoch: 163loss: tensor(0.9244)\n",
            "epoch: 164loss: tensor(0.9242)\n",
            "epoch: 165loss: tensor(0.9234)\n",
            "epoch: 166loss: tensor(0.9253)\n",
            "epoch: 167loss: tensor(0.9234)\n",
            "epoch: 168loss: tensor(0.9237)\n",
            "epoch: 169loss: tensor(0.9227)\n",
            "epoch: 170loss: tensor(0.9235)\n",
            "epoch: 171loss: tensor(0.9227)\n",
            "epoch: 172loss: tensor(0.9225)\n",
            "epoch: 173loss: tensor(0.9217)\n",
            "epoch: 174loss: tensor(0.9224)\n",
            "epoch: 175loss: tensor(0.9225)\n",
            "epoch: 176loss: tensor(0.9247)\n",
            "epoch: 177loss: tensor(0.9219)\n",
            "epoch: 178loss: tensor(0.9210)\n",
            "epoch: 179loss: tensor(0.9210)\n",
            "epoch: 180loss: tensor(0.9211)\n",
            "epoch: 181loss: tensor(0.9209)\n",
            "epoch: 182loss: tensor(0.9210)\n",
            "epoch: 183loss: tensor(0.9204)\n",
            "epoch: 184loss: tensor(0.9208)\n",
            "epoch: 185loss: tensor(0.9205)\n",
            "epoch: 186loss: tensor(0.9206)\n",
            "epoch: 187loss: tensor(0.9202)\n",
            "epoch: 188loss: tensor(0.9201)\n",
            "epoch: 189loss: tensor(0.9195)\n",
            "epoch: 190loss: tensor(0.9200)\n",
            "epoch: 191loss: tensor(0.9200)\n",
            "epoch: 192loss: tensor(0.9199)\n",
            "epoch: 193loss: tensor(0.9195)\n",
            "epoch: 194loss: tensor(0.9193)\n",
            "epoch: 195loss: tensor(0.9198)\n",
            "epoch: 196loss: tensor(0.9195)\n",
            "epoch: 197loss: tensor(0.9181)\n",
            "epoch: 198loss: tensor(0.9188)\n",
            "epoch: 199loss: tensor(0.9183)\n",
            "epoch: 200loss: tensor(0.9221)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5ztvzYRtiGCz",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "d0e8ea8b-9ac4-40e5-a19a-7fcfc6934d61"
      },
      "source": [
        "test_loss = 0\n",
        "s = 0.\n",
        "for id_user in range(nb_users):\n",
        "  input = Variable(training_set[id_user]).unsqueeze(0)\n",
        "  target = Variable(test_set[id_user]).unsqueeze(0)\n",
        "  if torch.sum(target.data > 0) > 0:\n",
        "    output = sae(input)\n",
        "    target.require_grad = False\n",
        "    output[target == 0] = 0\n",
        "    loss = criterion(output, target)\n",
        "    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)\n",
        "    test_loss += np.sqrt(loss.data*mean_corrector)\n",
        "    s += 1.\n",
        "print('test loss: '+str(test_loss/s))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "test loss: tensor(0.9681)\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}